pip install plotly
Requirement already satisfied: plotly in d:\programdata\anaconda3\lib\site-packages (5.6.0) Requirement already satisfied: tenacity>=6.2.0 in d:\programdata\anaconda3\lib\site-packages (from plotly) (8.0.1) Requirement already satisfied: six in d:\programdata\anaconda3\lib\site-packages (from plotly) (1.16.0) Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.offline as pyo
pyo.init_notebook_mode()
print('modules are imported')
modules are imported
dataset_url = 'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df = pd.read_csv(dataset_url)
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
df.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 151069 | 2022-02-18 | Zimbabwe | 233030 | 0 | 5385 |
| 151070 | 2022-02-19 | Zimbabwe | 233224 | 0 | 5386 |
| 151071 | 2022-02-20 | Zimbabwe | 233352 | 0 | 5386 |
| 151072 | 2022-02-21 | Zimbabwe | 233571 | 0 | 5386 |
| 151073 | 2022-02-22 | Zimbabwe | 233980 | 0 | 5388 |
df.shape
(151074, 5)
df= df[df.Confirmed > 0]
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
df[df.Country == 'Italy']
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 65627 | 2020-01-31 | Italy | 2 | 0 | 0 |
| 65628 | 2020-02-01 | Italy | 2 | 0 | 0 |
| 65629 | 2020-02-02 | Italy | 2 | 0 | 0 |
| 65630 | 2020-02-03 | Italy | 2 | 0 | 0 |
| 65631 | 2020-02-04 | Italy | 2 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 66376 | 2022-02-18 | Italy | 12377098 | 0 | 152596 |
| 66377 | 2022-02-19 | Italy | 12427773 | 0 | 152848 |
| 66378 | 2022-02-20 | Italy | 12469975 | 0 | 152989 |
| 66379 | 2022-02-21 | Italy | 12494459 | 0 | 153190 |
| 66380 | 2022-02-22 | Italy | 12554596 | 0 | 153512 |
754 rows × 5 columns
fig = px.choropleth(df , locations = 'Country', locationmode='country names',color='Confirmed'
,animation_frame='Date')
fig.update_layout(title_text = 'Global Spread of COVID-19')
fig.show()
fig = px.choropleth(df , locations = 'Country', locationmode='country names',color='Deaths'
,animation_frame='Date')
fig.update_layout(title_text = 'Global Death of COVID-19')
fig.show()
df_china = df[df.Country == 'China']
df_china.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 28231 | 2020-01-22 | China | 548 | 28 | 17 |
| 28232 | 2020-01-23 | China | 643 | 30 | 18 |
| 28233 | 2020-01-24 | China | 920 | 36 | 26 |
| 28234 | 2020-01-25 | China | 1406 | 39 | 42 |
| 28235 | 2020-01-26 | China | 2075 | 49 | 56 |
let's select the columns that we need
df_china = df_china[['Date','Confirmed']]
df_china.head()
| Date | Confirmed | |
|---|---|---|
| 28231 | 2020-01-22 | 548 |
| 28232 | 2020-01-23 | 643 |
| 28233 | 2020-01-24 | 920 |
| 28234 | 2020-01-25 | 1406 |
| 28235 | 2020-01-26 | 2075 |
calculating the first derivation of confrimed column
df_china['Infection Rate'] = df_china['Confirmed'].diff()
df_china.head()
| Date | Confirmed | Infection Rate | |
|---|---|---|---|
| 28231 | 2020-01-22 | 548 | NaN |
| 28232 | 2020-01-23 | 643 | 95.0 |
| 28233 | 2020-01-24 | 920 | 277.0 |
| 28234 | 2020-01-25 | 1406 | 486.0 |
| 28235 | 2020-01-26 | 2075 | 669.0 |
px.line(df_china,x='Date',y=['Confirmed','Infection Rate'])
df_china['Infection Rate'].max()
15136.0
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
countries=list(df['Country'].unique())
max_infection_rates=[]
for c in countries:
MIR=df[df.Country == c].Confirmed.diff().max()
max_infection_rates.append(MIR)
print(max_infection_rates)
[3243.0, 4789.0, 2521.0, 2313.0, 5035.0, 0.0, 468.0, 139853.0, 4388.0, 175271.0, 48357.0, 7779.0, 1497.0, 8173.0, 16230.0, 1329.0, 8921.0, 133480.0, 1517.0, 2566.0, 527.0, 23611.0, 5254.0, 8530.0, 287149.0, 7380.0, 12399.0, 1005.0, 7083.0, 4710.0, 1469.0, 1130.0, 9668.0, 58891.0, 4044.0, 596.0, 41651.0, 15136.0, 35575.0, 275.0, 1188.0, 4481.0, 18188.0, 2858.0, 11812.0, 9907.0, 5457.0, 57378.0, 55709.0, 99.0, 415.0, 392.0, 7439.0, 17670.0, 4260.0, 12677.0, 1750.0, 282.0, 8438.0, 1642.0, 5185.0, 1854.0, 19898.0, 503349.0, 1871.0, 587.0, 26320.0, 401828.0, 2521.0, 50182.0, 902.0, 5826.0, 534.0, 191.0, 1186.0, 737.0, 7.0, 12890.0, 45047.0, 7408.0, 414188.0, 64718.0, 50228.0, 13515.0, 43199.0, 243295.0, 228123.0, 1968.0, 104345.0, 22720.0, 66121.0, 3749.0, 350.0, 171448.0, 4397.0, 6913.0, 11505.0, 2508.0, 11992.0, 10760.0, 6925.0, 447.0, 5694.0, 192.0, 12968.0, 5497.0, 7.0, 2295.0, 1316.0, 28825.0, 2838.0, 1217.0, 1403.0, 3.0, 1211.0, 6194.0, 109895.0, 0.0, 6199.0, 520.0, 24556.0, 2960.0, 12039.0, 4947.0, 3268.0, 10052.0, 380615.0, 3183.0, 718.0, 301.0, 6158.0, 2332.0, 26109.0, 6146.0, 12073.0, 371.0, 19722.0, 975.0, 25833.0, 99645.0, 38867.0, 57659.0, 65706.0, 4206.0, 40018.0, 202211.0, 3072.0, 238.0, 722.0, 2723.0, 10.0, 491.0, 319.0, 5928.0, 1722.0, 36737.0, 2068.0, 192.0, 26032.0, 28504.0, 23332.0, 681.0, 1066.0, 37875.0, 503.0, 372766.0, 11366.0, 1284.0, 112.0, 1404.0, 138985.0, 89462.0, 905.0, 723.0, 407.0, 24307.0, 23418.0, 532.0, 1002.0, 69.0, 1259.0, 19923.0, 823225.0, 1368120.0, 21324.0, 45022.0, 4471.0, 848169.0, 13612.0, 1478.0, 4.0, 4418.0, 56149.0, 30356.0, 55.0, 287.0, 5555.0, 9185.0]
df_MIR=pd.DataFrame()
df_MIR['Country'] = countries
df_MIR['Max Infection Rates']=max_infection_rates
df_MIR.head()
| Country | Max Infection Rates | |
|---|---|---|
| 0 | Afghanistan | 3243.0 |
| 1 | Albania | 4789.0 |
| 2 | Algeria | 2521.0 |
| 3 | Andorra | 2313.0 |
| 4 | Angola | 5035.0 |
px.bar(df_MIR, x='Country',y='Max Infection Rates',color='Country', title ='Global Maximum Infection Rate', log_y=True)
On 9 March 2020, the government of Italy under Prime Minister Giuseppe Conte imposed a national quarantine, restricting the movement of the population except for necessity, work, and health circumstances, in response to the growing pandemic of COVID-19 in the country. source
italy_lockdown_start_date = '2020-03-09'
italy_lockdown_a_month_later = '2020-04-09'
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
let's get data related to italy
df_italy=df[df.Country =='Italy']
lets check the dataframe
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 65627 | 2020-01-31 | Italy | 2 | 0 | 0 |
| 65628 | 2020-02-01 | Italy | 2 | 0 | 0 |
| 65629 | 2020-02-02 | Italy | 2 | 0 | 0 |
| 65630 | 2020-02-03 | Italy | 2 | 0 | 0 |
| 65631 | 2020-02-04 | Italy | 2 | 0 | 0 |
let's calculate the infection rate in Italy
df_italy['Infection Rate']=df_italy.Confirmed.diff()
df_italy.head()
C:\Users\zinin\AppData\Local\Temp/ipykernel_16068/3001688291.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 65627 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN |
| 65628 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 |
| 65629 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 |
| 65630 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 |
| 65631 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 |
ok! now let's do the visualization
fig=px.line(df_italy,x='Date',y='Infection Rate',title='Before and After Lockdown in Italy')
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_start_date,
y0=0,
x1=italy_lockdown_start_date,
y1=df_italy['Infection Rate'].max(),
line=dict(color='red',width=2)
)
)
fig.add_annotation(
dict(
x = italy_lockdown_start_date,
y = df_italy['Infection Rate'].max(),
text='starting date of the lockdown'
)
)
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_a_month_later,
y0=0,
x1=italy_lockdown_a_month_later,
y1=df_italy['Infection Rate'].max(),
line=dict(color='orange',width=2)
)
)
fig.add_annotation(
dict(
x = italy_lockdown_a_month_later,
y = df_italy['Infection Rate'].max(),
text='one month later'
)
)
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 65627 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN |
| 65628 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 |
| 65629 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 |
| 65630 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 |
| 65631 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 |
let's calculate the death rate
df_italy['Deaths Rate'] = df_italy.Deaths.diff()
C:\Users\zinin\AppData\Local\Temp/ipykernel_16068/3823060142.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check the dataframe again
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Deaths Rate | |
|---|---|---|---|---|---|---|---|
| 65627 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN | NaN |
| 65628 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 65629 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 65630 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 65631 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
now let's plot a line chart to compare COVID19 national lockdowns impacts on spread of the virus and death reate
fig = px.line(df_italy,x='Date',y=['Infection Rate','Deaths Rate'])
fig.show()
let's normalize the columns
df_italy['Infectiion Rate']=df_italy['Infection Rate']/df_italy['Infection Rate'].max()
df_italy['Deaths Rate']=df_italy['Deaths Rate']/df_italy['Deaths Rate'].max()
C:\Users\zinin\AppData\Local\Temp/ipykernel_16068/4108101386.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\zinin\AppData\Local\Temp/ipykernel_16068/4108101386.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's plot the line chart again
fig=px.line(df_italy,x='Date',y=['Infection Rate','Deaths Rate'],title='Before and After Lockdown in Italy')
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_start_date,
y0=0,
x1=italy_lockdown_start_date,
y1=df_italy['Infection Rate'].max(),
line=dict(color='red',width=2)
)
)
fig.add_annotation(
dict(
x = italy_lockdown_start_date,
y = df_italy['Infection Rate'].max(),
text='starting date of the lockdown'
)
)
fig.add_shape(
dict(
type='line',
x0=italy_lockdown_a_month_later,
y0=0,
x1=italy_lockdown_a_month_later,
y1=df_italy['Infection Rate'].max(),
line=dict(color='orange',width=2)
)
)
fig.add_annotation(
dict(
x = italy_lockdown_a_month_later,
y = df_italy['Infection Rate'].max(),
text='one month later'
)
)
fig.show()
Lockdown was started in Freiburg, Baden-Württemberg and Bavaria on 20 March 2020. Three days later, it was expanded to the whole of Germany
Germany_lockdown_start_date = '2020-03-23'
Germany_lockdown_a_month_later = '2020-04-23'
let's select the data related to Germany
df_germany=df[df.Country=='Germany']
let's check the dataframe
df_germany
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 51126 | 2020-01-27 | Germany | 1 | 0 | 0 |
| 51127 | 2020-01-28 | Germany | 4 | 0 | 0 |
| 51128 | 2020-01-29 | Germany | 4 | 0 | 0 |
| 51129 | 2020-01-30 | Germany | 4 | 0 | 0 |
| 51130 | 2020-01-31 | Germany | 5 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 51879 | 2022-02-18 | Germany | 13491802 | 0 | 121207 |
| 51880 | 2022-02-19 | Germany | 13605445 | 0 | 121280 |
| 51881 | 2022-02-20 | Germany | 13667353 | 0 | 121302 |
| 51882 | 2022-02-21 | Germany | 13805197 | 0 | 121607 |
| 51883 | 2022-02-22 | Germany | 14026675 | 0 | 121908 |
758 rows × 5 columns
selecting the needed column
df_germany = df_germany[['Date','Confirmed','Deaths']]
let's check it again
df_germany
| Date | Confirmed | Deaths | |
|---|---|---|---|
| 51126 | 2020-01-27 | 1 | 0 |
| 51127 | 2020-01-28 | 4 | 0 |
| 51128 | 2020-01-29 | 4 | 0 |
| 51129 | 2020-01-30 | 4 | 0 |
| 51130 | 2020-01-31 | 5 | 0 |
| ... | ... | ... | ... |
| 51879 | 2022-02-18 | 13491802 | 121207 |
| 51880 | 2022-02-19 | 13605445 | 121280 |
| 51881 | 2022-02-20 | 13667353 | 121302 |
| 51882 | 2022-02-21 | 13805197 | 121607 |
| 51883 | 2022-02-22 | 14026675 | 121908 |
758 rows × 3 columns
let's calculate the infection rate and death rate in Germany
df_germany['Infection Rate']=df_germany.Confirmed.diff()
df_germany['Deaths Rate']=df_germany.Deaths.diff()
C:\Users\zinin\AppData\Local\Temp/ipykernel_16068/1992134607.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check the dataframe
df_germany
| Date | Confirmed | Deaths | Infection Rate | Deaths Rate | |
|---|---|---|---|---|---|
| 51126 | 2020-01-27 | 1 | 0 | NaN | NaN |
| 51127 | 2020-01-28 | 4 | 0 | 3.0 | 0.0 |
| 51128 | 2020-01-29 | 4 | 0 | 0.0 | 0.0 |
| 51129 | 2020-01-30 | 4 | 0 | 0.0 | 0.0 |
| 51130 | 2020-01-31 | 5 | 0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 51879 | 2022-02-18 | 13491802 | 121207 | 182762.0 | 210.0 |
| 51880 | 2022-02-19 | 13605445 | 121280 | 113643.0 | 73.0 |
| 51881 | 2022-02-20 | 13667353 | 121302 | 61908.0 | 22.0 |
| 51882 | 2022-02-21 | 13805197 | 121607 | 137844.0 | 305.0 |
| 51883 | 2022-02-22 | 14026675 | 121908 | 221478.0 | 301.0 |
758 rows × 5 columns
now let's plot the line chart
fig = px.line(df_germany,x='Date',y=['Infection Rate','Deaths Rate'])
fig.show()
let's do some scaling
df_germany['Infection Rate']=df_germany['Infection Rate']/df_germany['Infection Rate'].max()
df_germany['Deaths Rate']=df_germany['Deaths Rate']/df_germany['Deaths Rate'].max()
fig = px.line(df_germany,x='Date',y=['Infection Rate','Deaths Rate'])
fig.show()
let's plot the line chart
fig=px.line(df_germany,x='Date',y=['Infection Rate','Deaths Rate'],title='Before and After Lockdown in Germany')
fig.add_shape(
dict(
type='line',
x0=Germany_lockdown_start_date,
y0=0,
x1=Germany_lockdown_start_date,
y1=df_germany['Infection Rate'].max(),
line=dict(color='black',width=2)
)
)
fig.add_annotation(
dict(
x = Germany_lockdown_start_date,
y = df_germany['Infection Rate'].max(),
text='starting date of the lockdown'
)
)
fig.add_shape(
dict(
type='line',
x0=Germany_lockdown_a_month_later,
y0=0,
x1=Germany_lockdown_a_month_later,
y1=df_germany['Infection Rate'].max(),
line=dict(color='yellow',width=2)
)
)
fig.add_annotation(
dict(
x = Germany_lockdown_a_month_later,
y = df_germany['Infection Rate'].max(),
text='one month later'
)
)
fig.show()